On this page, we will be exploring the overall trends for our key outcome- sleeping hours per day, and how these outcomes correlate with the predictors, including demographic variables like age, race, sex, education level, and poverty status.

library(tidyverse)
library(patchwork)
library(knitr)
library(dplyr)
library(gganimate)
library(gifski)
library(png)
library(plotly)
library(ggridges)

Education level

We first want to get the distribution of sleeping hours less than 7 hours across the five different education levels. We will construct a bar chart tabulating the average sleeping hours per week in each of the five education levels. Gender consideration was also added into to the bar chart in order to see a difference between female and male in each category.

 edu_plot=slp_df %>%
  filter(weekday_slp_hr<7)%>%
  group_by(education_level,gender) %>%
  summarize(ave_sleep=mean((weekday_slp_hr*5+weekend_slp_hr*2)/7)) %>% 
  ungroup() %>%
  mutate(education_level=fct_reorder(education_level,ave_sleep)) %>%
  ggplot(aes(x=education_level,y=ave_sleep,fill=gender))+ geom_bar(width=0.5,stat="identity")+
  viridis::scale_fill_viridis(
    name = "gender",
    discrete = TRUE
  ) + geom_text(aes(label = round(ave_sleep, 2)),position = position_stack(vjust=0.9), color = "white", size = 4)+
  theme(axis.text.x = element_text(angle = -90, vjust = 0.5, hjust=1))+labs(
    title = "Distribution of sleeping hours across education level",
    x = "Education Level",
    y = "Average Sleeping hours per day"
    )
edu_plot

From the above plot, high school graduates has the least sleeping hours, while college graduates or above has the highest. Female and male does not have a significant difference in both groups.

Race

A heat map was made to visualize sleeping hours less than 7 hours among different races. The below plot indicates that there is a large number of people who has a sleeping hour of 6 hours per day among the Non-Hispanic black group. There are also many Non-Hispanic Whites who has an average of 6 hours per day.

race_plot=slp_df %>%
  filter(weekday_slp_hr<7) %>%
  mutate(sleep_ave=(weekday_slp_hr*5+weekend_slp_hr*2)/7) %>%
  group_by(race,sleep_ave) %>%
  summarise(obs=n()) %>%
  plot_ly(
    x = ~sleep_ave, y = ~race, z = ~obs, type = "heatmap", colors = "BuPu"
  ) %>%
  colorbar(title = "Number of People", x = 1, y = 0.5) 
layout(race_plot, xaxis = list(title = "Average Sleeping Hours Per Day"), yaxis = list(title = "Race"))

Race Gender Gap by Education Level

The below plot demonstrates the gender gap in the patients for different races. Male outnumber female for sleeping less than 7 hours per day for all race, except Non-Hispanic Black and Non-Hispanic Asian. The bubble represents the degree of the gap, along inlcuding their education level.

gender_plot=slp_df %>%
  filter(weekday_slp_hr<7) %>%
  group_by(race,education_level) %>%
  summarize(total_f=sum(gender=="female"),
            total_m=sum(gender=="male"),
            gap=total_m-total_f) %>%
  mutate(text_lable=str_c("Race=",race,"\nEducation level: ", education_level)) %>%
  plot_ly(x=~total_m,y=~total_f,text=~text_lable,color=~race,size=~gap,type="scatter",mode="markers",
          colors="viridis",sizes = c(50, 700), marker = list(opacity = 0.7))

layout(gender_plot, title = "Race Gender Gap by Education Level", xaxis = list(title = "Number of Male Sleeping less than 7 hrs"), yaxis = list(title = "Number of Female Sleeping less than 7 hrs"))

Poverty Status

We can observe that people who are in poverty tend to have less sleeping hours than those who are not in poverty.

income_df=slp_df %>%
  filter(weekday_slp_hr<7) %>%
  mutate(ip_stat=case_when(income_poverty_ratio > 1 ~ "not in poverty",
                           income_poverty_ratio < 1~ "in poverty",
                           income_poverty_ratio == 1~ "in poverty")) %>%
  ggplot(aes(x=weekday_slp_hr,y=ip_stat,fill=ip_stat))+
  geom_density_ridges(
    aes(point_color = ip_stat, point_shape = ip_stat,point_fill=ip_stat),
    alpha = .3, point_alpha = 0.7)+
   scale_x_continuous(
    breaks = c(2, 4, 6), 
    labels = c("2hrs", "4hrs", "6hrs"),
    limits = c(2, 6)
    )+labs(
    x = " Average Sleeping Hours"
    )
  
box_plot=
  slp_df %>%
  filter(weekday_slp_hr<7) %>%
  mutate(ip_stat=case_when(income_poverty_ratio > 1 ~ "not in poverty",
                           income_poverty_ratio < 1~ "in poverty",
                           income_poverty_ratio == 1~ "in poverty")) %>%
  mutate(sleep_ave=(weekday_slp_hr*5+weekend_slp_hr*2)/7) %>%
  ggplot(aes(x=ip_stat,y=sleep_ave))+geom_boxplot(aes(fill = ip_stat), alpha = 0.3)+
  geom_hline(aes(yintercept=median(sleep_ave),
            color="red", linetype="dashed"))+
  geom_text(aes(0, median(weekday_slp_hr), label = "sleep hours median"), vjust = -0.5, hjust = 0, color = "red")+labs(
    x = " Poverty Status",
    y = "Average Sleeping Hours"
    )

comb=income_df+box_plot
comb+plot_annotation(
  title = "Sleeping Hours By Poverty Status"
) 

Age

A bar plot was made to see the distribution of average sleeping hours less than 7 hours in different age groups. People age between 71 and 80 has the least sleeping hours with about 5.66 hours per day.

age_group= slp_df%>%
  filter(weekday_slp_hr<7) %>%
  mutate(age_gp=case_when(age>=20 & age<=30 ~ "20-30",
                          age>=31 &age <=40 ~ "31-40",
                          age>=41 &age<=50 ~ "41-50",
                          age>=51 &age<=60 ~ "51-60",
                          age>=61 &age<=70 ~ "61-70",
                          age>=71 & age <=80 ~ "71-80")) %>%
  group_by(age_gp) %>%
  summarise(ave_slp=mean((weekday_slp_hr*5+weekend_slp_hr*2)/7))%>%
  ungroup() %>%
  mutate(age_gp=fct_reorder(age_gp,ave_slp)) %>%
  ggplot(aes(x=age_gp,y=ave_slp,fill=age_gp))+ geom_bar(stat="identity")+ scale_fill_viridis_d()+
  theme(axis.text.x = element_text(angle = -90, vjust = 0.5, hjust=1))+
  geom_text(aes(label = round(ave_slp, 2)),position = position_stack(vjust=0.9), color = "white", size = 4)+labs(x="Age Group",y="Average Sleeping Hours Per Day",title="Distribution of Sleeping Hours across Age Group")
age_group